import time
import requests
from bs4 import BeautifulSoup
import  json
import re
page=int(input("输入你想爬取的页数："))
for i in range(page):
    print("正在爬取第{}页".format(i+1))
    req_url='https://search.51job.com/list/000000,000000,0000,00,9,99,java,2,{}.html?'.format(i)
    headers={
        "host":"search.51job.com",
        "cookie":'__uab_collina=165467546074625328692927; '
                 'guid=cd75c776ea8008ba89f44432d841b892;'
                 ' sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22cd75c776ea8008ba89f44432d841b892%22%2C%22first_id%22%3A%221814257a6d81bc-0bfd8ab5e76b1b-26021b51-1327104-1814257a6d9e8f%22%2C%22props%22%3A%7B%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTgxNDI1N2E2ZDgxYmMtMGJmZDhhYjVlNzZiMWItMjYwMjFiNTEtMTMyNzEwNC0xODE0MjU3YTZkOWU4ZiIsIiRpZGVudGl0eV9sb2dpbl9pZCI6ImNkNzVjNzc2ZWE4MDA4YmE4OWY0NDQzMmQ4NDFiODkyIn0%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22cd75c776ea8008ba89f44432d841b892%22%7D%2C%22%24device_id%22%3A%221814257a6d81bc-0bfd8ab5e76b1b-26021b51-1327104-1814257a6d9e8f%22%7D; '
                 'nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; _ujz=MjExNzMyNjAxMA%3D%3D; ps=needv%3D0; slife=lowbrowser%3Dnot%26%7C%26lastlogindate%3D20220608%26%7C%26securetime%3DBjpWY1MwAmICZwM9WmJZN1dhVmQ%253D;'
                 ' privacy=1654704695;'
                 ' acw_tc=2f624a5716547326611051604e2d34d658b8d3bc59fa1e7fd58e70e28fd298; acw_sc__v2=62a13776111eb7688c3b2a20ba042cec5ef707f0; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FAjava%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FApython%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch2%7E%60040000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FAjava%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; '
                 'ssxmod_itna=iqRx0DciG=0=e0LxYKBChK=0QMKD5wX6YbdD/IiGDnqD=GFDK40oYBhiYDOQ9recBe0=FijO2ENdKAeBEKM91RDadiDCPGnDB9I1i/DxiinDCeDIDWeDiDG4GmB4GtDpxG=Dj0FUZMUxi3Dbh=Df4DmDGY9QqDgDYQDGuP6D7QDIw6ecf2SfCU=bC+o7qDMUeGXiDaq7dRWHcGZbQnrPleuDB6CxBjMfwNUfeDHzLNlemvI9ODYQG52iYp=IBq4K0Dm8I+o/fxIQBONK0woQY32fBOcxDf=ek1xD; ssxmod_itna2=iqRx0DciG=0=e0LxYKBChK=0QMKD5wX6YbG9QVDmEExGX0NGa9KimHk1x8OrazymKRF0khxGfi/f5R1fIGuATakmUq2jvFo0q26vqU+0SFOQ5Y9l5zzb+mZU=V4tdU3cTUPs4xGTj+N0IDtzlk5MQeAPU8YjUb=2+T6DghRL+3AL3d3/Dfbg/kuN5ZoYoZtWkqYo6mcLALcisCRP5u9BCCNzCm2B7C5OxTbTD7QqCxGcDiQqeD==',
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36",
    }
    html=requests.get(url=req_url,headers=headers)
    # print(html.text)
    soup=BeautifulSoup(html.text,'html.parser')
    b=soup.find_all('script',attrs={'type':'text/javascript'},text=None)
    c=b[2].text
    string=c.replace('window.__SEARCH_RESULT__ = ','')
    #转换为字典
    infodict=json.loads(string)
    #通过字典获取我们想要的数据
    engine_jds=infodict['engine_jds']
    list=[]
    for i in engine_jds:
        name=i['job_name']
        salary=i['providesalary_text']
        company=i['company_name']
        href=i['company_href']
        # print(name,salary,company,href)
        with open('C:\\Users\\DELL\\Desktop\\51jobs.csv','a',encoding='utf8') as f:
            f.write('{},{},{},{}\n'.format(name,salary,company,href))
            f.close()











